source('jupyterFunctions_perCellType.R')
CT <- 'stromal'
CT_label <- 'stromal'
data_prefix <- paste(sep='','../data/',CT,'/',CT)
ATAC_meta <- readRDS(paste(sep='',data_prefix,'_ATAC_meta.rds'))
chosenPeaks <- readRDS(paste(sep='',data_prefix,'_chosenPeaks.rds'))
snATAC_pxc_norm <- readRDS(paste(sep='',data_prefix,'_snATAC_pxc_norm.rds'))
snRNA_gxc_norm <- readRDS(paste(sep='',data_prefix,'_snRNA_gxc_norm.rds'))
snATAC_pxCT_norm <- readRDS(paste(sep='',data_prefix,'_snATAC_pxCT_norm.rds'))
snRNA_gxCT_norm <- readRDS(paste(sep='',data_prefix,'_snRNA_gxCT_norm.rds'))
chromVARz_mat <- readRDS(paste(sep='',data_prefix,'_ArchR_chromVARz_JASPAR2020.rds'))
ArchR_padj <- readRDS(paste(sep='',data_prefix,'_ArchR_padj_JASPAR2020.rds'))
CITE_meta <- readRDS(paste(sep='',data_prefix,'_CITE_meta.rds'))
class_state_df <- readRDS(paste(sep='',data_prefix,'_class_state_df.rds'))
LDA_res <- readRDS(paste(sep='',data_prefix,'_LDA_stats.rds'))
other_resol <- readRDS(paste(sep='',data_prefix,'_ATAC_otherRes.rds'))
CNA_CF <- readRDS(paste(sep='',data_prefix,'_CNA_CTAP-F.rds'))
DNAmethyl_vec <- readRDS(paste(sep='',data_prefix,'_DNAmethylation.rds'))
ATAC_pxc_norm <- readRDS(paste(sep='',data_prefix,'_ATAC_pxc_norm.rds'))
ATAC_colors <- readRDS('../data/misc/ATAC_class_colors.rds')
CITE_colors <- readRDS('../data/misc/CITE_state_colors.rds')
ATAC_CITE_conv_df <- readRDS('../data/misc/ATAC_CITE_sample_conversion.rds')
save_dir <- NA #'../output/' #or NA if don't want to save
#Fig 3a
options(repr.plot.height=6,repr.plot.width=12)
g <- ggplot(ATAC_meta,aes_string(x='UMAP1',y='UMAP2',color='cluster_name')) + geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_manual(values=ATAC_colors) +
labs(color=paste(sep='','RA ',CT_label,'\nchromatin class')) +
theme(legend.text=element_text(size=22)) +
guides(colour = guide_legend(override.aes = list(size=6,alpha=1)))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_class_UMAP.png'),
plot=g,units='in',height=6,width=12,dpi=600)
#Fig S3a
options(repr.plot.height=6,repr.plot.width=12)
g <- cellCount_bySample_barPlot(ATAC_meta,'sample','cluster_name',paste(sep='','RA ',CT_label,'\nchromatin class'),
ATAC_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_class_cellCount.png'),
plot=g,units='in',height=6,width=12,dpi=600)
chosenGenes <- names(chosenPeaks)
chosenPeaks <- chosenPeaks[!is.na(chosenPeaks)] #NA means no peak in gene's promoter
#Fig 3b bottom
genes_forUMAPs <- c('PRG4','CXCL12','CD34','MCAM')
if(!all(genes_forUMAPs %in% names(chosenPeaks))) stop('Genes for UMAP not in chosen genes')
multiome_cells <- rownames(ATAC_meta[which(ATAC_meta$assay=='snATAC'),])
options(repr.plot.height=2,repr.plot.width=7)
g <- plot_markerPeaks_norm_hex_v2(ATAC_meta[multiome_cells,],snRNA_gxc_norm[genes_forUMAPs,multiome_cells],'UMAP1','UMAP2',
plot_genes=genes_forUMAPs,plotCol=length(genes_forUMAPs),
titleSize=15,hex_bins=17,cutCap=0)
grid.draw(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerGene_UMAP.png'),
plot=g,units='in',height=2,width=7,dpi=600)
#Fig 3b top
toPlot <- snATAC_pxc_norm[unname(chosenPeaks[genes_forUMAPs]),multiome_cells]
rownames(toPlot) <- paste(sep='',names(chosenPeaks[genes_forUMAPs]),' peak')
options(repr.plot.height=2,repr.plot.width=7)
g <- plot_markerPeaks_norm_hex_v2(ATAC_meta[multiome_cells,],toPlot,'UMAP1','UMAP2',
plot_genes=rownames(toPlot),plotCol=nrow(toPlot),titleSize=15,hex_bins=17,cutCap=0,
titleFace='plain',colorOpt='plasma')
grid.draw(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerPeak_UMAP.png'),
plot=g,units='in',height=2,width=7,dpi=600)
class_order <- c('SA-1','SA-2','SA-0','SA-3')
all(class_order %in% ATAC_meta$cluster_abbr)
#Fig S3b
res <- scaleFeat_forHeatmap(chosenGenes,class_order,chosenPeaks,snRNA_gxCT_norm,snATAC_pxCT_norm)
snRNA_gxCT_norm_subset_scaled <- res$gxCT_norm_subset_scaled
snATAC_pxCT_norm_subset_scaled <- res$pxCT_norm_subset_scaled
fxCT_norm_subset_scaled <- res$fxCT_norm_subset_scaled
scale_lim <- max(abs(snRNA_gxCT_norm_subset_scaled),abs(snATAC_pxCT_norm_subset_scaled),na.rm=TRUE)
options(repr.plot.height=7,repr.plot.width=9)
g <- pseudobulk_scaled_heatmap(snRNA_gxCT_norm_subset_scaled,'Gene',paste('RA',CT_label,'chromatin class'),
'Scaled\nMean\nNormalized\nGene\nExpression',
plotTit=paste('Scaled Mean Normalized Gene Expression\nof multiome cells by RA',
CT_label,'chromatin classes'),
scale_lim=scale_lim,clustColors=ATAC_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerGene_heatmap.png'),
plot=g,units='in',height=7,width=9,dpi=600)
g <- pseudobulk_scaled_heatmap(snATAC_pxCT_norm_subset_scaled,'Peak',paste('RA',CT_label,'chromatin class'),
'Scaled\nMean\nNormalized\nPeak\nAccessibility',
plotTit=paste('Scaled Mean Normalized Peak Accessibility\nof multiome cells by RA',
CT_label,'chromatin classes'),
scale_lim=scale_lim,clustColors=ATAC_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_markerPeak_heatmap.png'),
plot=g,units='in',height=7,width=9,dpi=600)
pearR <- cor.test(fxCT_norm_subset_scaled$gene_norm_scale,fxCT_norm_subset_scaled$peak_norm_scale,
method='pearson')
fxCT_norm_subset_scaled$label <- ''
fxCT_norm_subset_scaled[which(fxCT_norm_subset_scaled$gene=='MMP1' & fxCT_norm_subset_scaled$cluster_abbr=='SA-0'),
'label'] <- 'MMP1'
fxCT_norm_subset_scaled[which(fxCT_norm_subset_scaled$gene=='ACTA2' & fxCT_norm_subset_scaled$cluster_abbr=='SA-3'),
'label'] <- 'ACTA2'
g <- ggplot(fxCT_norm_subset_scaled,
aes_string(x='gene_norm_scale',y='peak_norm_scale',color='cluster_abbr',label='label')) +
geom_point(size=2) + theme_classic(base_size=25) + scale_color_manual(values=ATAC_colors) +
labs(x='Scaled Mean Normalized\nGene Expression',
y='Scaled Mean Normalized\nPeak Accessibility',
color=paste(sep='','RA ',CT_label,'\nchromatin\nclass')) +
geom_abline(slope=1,intercept=0,linetype='dashed') +
ggtitle(paste(sep='','R=',round(pearR$estimate,2),' p-value=',signif(pearR$p.value,3))) +
theme(plot.title=element_text(hjust = 0.5)) + geom_text_repel(box.padding = 0.5,size=6.5,fontface='bold',seed=0)
suppressWarnings(print(g)) #points excluded if peak does not exist
if(!is.na(save_dir)) suppressWarnings(ggsave(file=paste(sep='',save_dir,CT,'_markerGenePeak_scatterplot.png'),
plot=g,units='in',height=7,width=9,dpi=600))
#Fig S3c
hyper_assoc_peaks <- names(DNAmethyl_vec[which(DNAmethyl_vec=='hypermethylated')])
hypo_assoc_peaks <- names(DNAmethyl_vec[which(DNAmethyl_vec=='hypomethylated')])
perCell_scores <- ATAC_perCell_score(ATAC_pxc_norm,hyper_assoc_peaks,hypo_assoc_peaks)
toPlot <- cbind(ATAC_meta, 'DNAmethyl_score'=perCell_scores[rownames(ATAC_meta)])
toPlot$cluster_abbr <- factor(toPlot$cluster_abbr,levels=class_order)
options(repr.plot.height=5,repr.plot.width=5)
g <- ggplot(toPlot,aes_string(x='cluster_abbr',y='DNAmethyl_score',fill='cluster_name')) + geom_violin() +
theme_bw(base_size=22) + scale_fill_manual(values=ATAC_colors) + theme(legend.position="none") +
labs(x=paste('RA',CT_label,'chromatin class'),
y='DNA Methylation Score\nHypo Hyper') +
geom_hline(yintercept=0,linetype='dashed',color='black')
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_DNAmethylation_scores.png'),
plot=g,units='in',height=5,width=5,dpi=600)
within <- toPlot[which(toPlot$cluster_abbr=='SA-0'),'DNAmethyl_score']
without <- toPlot[which(toPlot$cluster_abbr!='SA-0'),'DNAmethyl_score']
ll <- wilcox.test(within,without,alternative = "less")
ll
ll$p.value
#Fig 3c left
#fix cell names
split1 <- str_split_fixed(colnames(chromVARz_mat),'#',2)
new_colnames <- paste(sep='',split1[,1],'_',str_split_fixed(split1[,2],'-',2)[,1])
if(!identical(sort(new_colnames),sort(rownames(ATAC_meta)))) stop('cellnames not consistent b/t ATAC_meta and chromVAR')
colnames(chromVARz_mat) <- new_colnames
motif_toPlot <- 'FOS..JUND_341'
toPlot <- cbind(ATAC_meta,'motif'=chromVARz_mat[motif_toPlot,rownames(ATAC_meta)])
options(repr.plot.height=6,repr.plot.width=8.5)
g <- ggplot(toPlot,aes_string(x='UMAP1',y='UMAP2',color='motif')) + geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_gradient2(low='blue',mid='white',high='red',midpoint=0) +
labs(color=paste(sep='',str_split_fixed(motif_toPlot,'_',2)[,1],'\nchromVAR\ndeviations'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_motif_',str_replace(motif_toPlot,'\\.\\.','-'),'_UMAP.png'),
plot=g,units='in',height=6,width=8.5,dpi=600)
#Fig 3c right
#add hyphen back
if(!identical(sort(colnames(ArchR_padj)),sort(colnames(snRNA_gxCT_norm))) &
all(str_detect(colnames(ArchR_padj),'^[a-zA-Z]{2}[0-9]+$'))){
colnames(ArchR_padj) <- lapply(colnames(ArchR_padj),FUN=function(s){paste(sep='',substr(s,1,2),'-',
substr(s,3,nchar(s)))})
}
if(!identical(sort(colnames(ArchR_padj)),
sort(colnames(snRNA_gxCT_norm)))) stop('mxCT and gxCT matrices do not have same CT.')
options(repr.plot.height=6,repr.plot.width=12)
g <- ArchR_topMotifs_KWspin(ArchR_padj,snRNA_gxCT_norm,cOrd=class_order,cColors=ATAC_colors,
minE=5,num_mot=10,minGE=0.05,withinE=0.95,
mLab='Transcription Factor Motif',cLab=paste(sep='','RA ',CT_label,'\nchromatin class'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_motif_heatmap.png'),
plot=g,units='in',height=6,width=12,dpi=600)
#Fig 3d left
gene_toPlot <- 'MMP3'
toPlot <- cbind(ATAC_meta[multiome_cells,],'gene'=snRNA_gxc_norm[gene_toPlot,multiome_cells])
options(repr.plot.height=6,repr.plot.width=8.5)
g <- ggplot(toPlot[order(toPlot$gene),],aes_string(x='UMAP1',y='UMAP2',color='gene')) +
geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_viridis(option = 'viridis') +
labs(color=paste(sep='',gene_toPlot,'\nNormalized\nGene\nExpression'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_gene_',gene_toPlot,'_UMAP.png'),
plot=g,units='in',height=6,width=8.5,dpi=600)
#Fig S8b
options(repr.plot.height=6,repr.plot.width=8)
g <- symp_prop_df(ATAC_meta[multiome_cells,],CITE_meta,
paste(sep='','Multiome ',CT_label,' cell state proportions\ninferred from snRNA (n=',
length(unique(ATAC_meta[multiome_cells,'sample'])),')'),
paste(sep='','AMP-RA ',CT_label,'\ncell state proportions (n=',
length(unique(CITE_meta$sample)),')'),
paste(sep='','AMP-RA\n',CT_label,'\ncell state'),clustColors=CITE_colors)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_CITE_state_prop.png'),
plot=g,units='in',height=6,width=8,dpi=600)
#Fig 7b left
options(repr.plot.height=6,repr.plot.width=6)
g <- ggplot(ATAC_meta[multiome_cells,],aes_string(x='UMAP1',y='UMAP2',color='CITE')) +
geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_manual(values=CITE_colors) +
theme(legend.position="none")
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_snATAC_state_UMAP.png'),
plot=g,units='in',height=6,width=6,dpi=600)
#setting order
class_conv_df <- unique(ATAC_meta[,c('cluster_name','cluster_abbr')])
rownames(class_conv_df) <- class_conv_df$cluster_abbr
full_class_order <- class_conv_df[class_order,'cluster_name']
class_state_df$class <- factor(class_state_df$class,levels=class_order)
state_order <- class_state_df[order(class_state_df$class,class_state_df$intOrd),'state']
state_conv_df <- unique(ATAC_meta[,c('CITE','CITE_abbr')])
rownames(state_conv_df) <- state_conv_df$CITE_abbr
full_state_order <- state_conv_df[state_order,'CITE']
#Fig 7b right
fisher_df <- calc_OR(ATAC_meta[multiome_cells,], 'cluster_name', 'CITE')
write.table(fisher_df[,c('cluster_name','CITE','OR','pval','padj','CI_low','CI_high')],
file=paste(sep='',save_dir,CT,'_class_state_OR_table.txt'),quote=FALSE,sep='\t',row.names=FALSE)
g <- plot_OR(fisher_df, 'cluster_name', 'CITE',
paste('RA',CT_label,'chromatin class'), paste('AMP-RA',CT_label,'cell state'),
full_class_order, full_state_order,
clustColors=c(ATAC_colors,CITE_colors))
options(repr.plot.height=6,repr.plot.width=12)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_class_state_OR_heatmap.png'),
plot=g,units='in',height=6,width=12,dpi=600)
#Fig S11b
options(repr.plot.height=10,repr.plot.width=12)
g <- LDA_plots(LDA_res,CT,paste('AMP-RA',CT_label,'cell state'),
class_state_df=class_state_df,ctOrd_col='intOrd',ctOrd=class_order,
clustColors=c(CITE_colors,ATAC_colors))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_LDA_heatmap.png'),
plot=g,units='in',height=10,width=12,dpi=600)
#Fig S9b left
gene_toPlot <- 'CLIC5'
toPlot <- cbind(ATAC_meta[multiome_cells,],'gene'=snRNA_gxc_norm[gene_toPlot,multiome_cells])
options(repr.plot.height=6,repr.plot.width=8.5)
g <- ggplot(toPlot[order(toPlot$gene),],aes_string(x='UMAP1',y='UMAP2',color='gene')) +
geom_point(size=1,alpha=0.5) +
theme_classic(base_size=25) + scale_color_viridis(option = 'viridis') +
labs(color=paste(sep='',gene_toPlot,'\nNormalized\nGene\nExpression'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_gene_',gene_toPlot,'_UMAP.png'),
plot=g,units='in',height=6,width=8.5,dpi=600)
#Fig S10b top
if(!identical(sort(rownames(other_resol)),sort(rownames(ATAC_meta)))) stop('rownames need to be the same')
toPlot <- cbind(ATAC_meta,other_resol[rownames(ATAC_meta),])
for(cc in colnames(other_resol)){
cluster_colors <- hue_pal()(length(unique(toPlot[,cc])))
names(cluster_colors) <- sort(unique(toPlot[,cc]))
options(repr.plot.height=6,repr.plot.width=8)
g <- ggplot(toPlot[multiome_cells,],aes_string(x='UMAP1',y='UMAP2',color=cc)) + geom_point(size=1,alpha=0.5) +
theme_classic(base_size=20) + scale_color_manual(values=cluster_colors) +
guides(colour = guide_legend(override.aes = list(size=5,alpha=1)))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_',cc,'_UMAP.png'),
plot=g,units='in',height=6,width=8,dpi=600)
}
#Fig S10b bottom
if(!identical(sort(rownames(other_resol)),sort(rownames(ATAC_meta)))) stop('rownames need to be the same')
toPlot <- cbind(ATAC_meta,other_resol[rownames(ATAC_meta),])
for(cc in colnames(other_resol)){
cluster_colors <- hue_pal()(length(unique(toPlot[,cc])))
names(cluster_colors) <- sort(unique(toPlot[,cc]))
fisher_df <- calc_OR(toPlot[multiome_cells,], cc, 'CITE')
this_state_order <- full_state_order
clustOrd <- reorder_col_diag_plotOR(fisher_df,cc,'CITE',yOrd=this_state_order,mCol='lnOR',op='max')
g <- plot_OR(fisher_df, cc, 'CITE',
paste('RA',CT_label,'chromatin cluster\nat resolution',str_split_fixed(cc,'_',2)[,2]),
paste('AMP-RA',CT_label,'cell state'),
clustOrd, this_state_order,
clustColors=c(cluster_colors,CITE_colors))
options(repr.plot.height=6,repr.plot.width=12)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_',cc,'_state_OR_heatmap.png'),
plot=g,units='in',height=6,width=12,dpi=600)
}
#Fig S12a
tVec <- lapply(sort(unique(ATAC_meta$cluster_name)),replace_space_newline_afterHalf,wiggle=5)
names(tVec) <- sort(unique(ATAC_meta$cluster_name))
options(repr.plot.height=4.25,repr.plot.width=4*length(unique(ATAC_meta$cluster_abbr)))
g <- donor_prop_comp_plot(ATAC_CITE_conv_df,ATAC_meta[which(ATAC_meta$assay=='scATAC'),],CITE_meta,
clustColors=ATAC_colors,tSize=18,tVec=tVec)
grid.draw(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_ATAC_CITE_donor_prop.png'),
plot=g,units='in',height=4.25,width=4*length(unique(ATAC_meta$cluster_abbr)),dpi=600)
#Fig S12c
CNA_CF_FDR <- 0.13
CNA_CF_globalp <- 0.0027
CNA_title <- 'CTAP-F'
CNA_col <- 'cna_corr_F'
class_col <- 'ATAC_cluster_name'
toPlot <- CNA_add_col(CITE_meta, CNA_CF, CNA_col)
options(repr.plot.height=6,repr.plot.width=7)
g <- CNA_umap_plots(toPlot,'ATAC_UMAP1','ATAC_UMAP2',CNA_col,
thisTitle=paste(CNA_title,'CNA correlations\n','p =',CNA_CF_globalp),
smallPt=TRUE,fdr_thresh=CNA_CF_FDR)
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_',str_replace(CNA_title,' ','_'),'_CNA_UMAP.png'),
plot=g,units='in',height=6,width=7,dpi=600)
#only plot those classes whose median is above FDR thresholds
ll <- aggregate(toPlot[,CNA_col] ~ toPlot[,class_col], FUN=median)
colnames(ll) <- c(class_col,CNA_col)
CNA_classes <- ll[which(abs(ll[,CNA_col])>CNA_CF_FDR),class_col]
options(repr.plot.height=4,repr.plot.width=8)
g <- CNA_violin_plots(toPlot[which(toPlot[,class_col] %in% CNA_classes),],
class_col,CNA_col,CNA_CF_FDR,thisTitle=paste(CNA_title,'CNA'))
print(g)
if(!is.na(save_dir)) ggsave(file=paste(sep='',save_dir,CT,'_',str_replace(CNA_title,' ','_'),'_CNA_violin.png'),
plot=g,units='in',height=4,width=8,dpi=600)
sessionInfo()